/* Copyright 2008 Google Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Contributors: * Brad Neuberg * Thibaud Lopez Schneider */ // make everything here anonymous, except for a global variable named // searchTools that others can script if they want to attach their own // search UI var searchTools = function() { // a Gears database var db = null; // handles the UI var ui = null; // the name of our database var dbName = null; // a searchTools instance searchTools = null; /* Creates a SearchTools object, which helps us do client-side search over a set of documents downloaded remotely using SQLites Full-Text Search abilities. */ pu.declare('SearchTools', null, { forceMimeType: false, appname_: 'searchtools_', constructor: function() { // 'pu' is a re-namespaced small build of Dojo that we use internally. // we re-namespace it so that it will not collide with others who // might be using Dojo on their own pages pu.connect(pu, 'loaded', this, this.ready_); }, /** Does the actual client-side search. */ search: function(query) { if (!query) { alert('Please enter a search string'); return; } // TODO: make sure query term doesn't have bad symbols // and possibly re-write them ui.status("Searching..."); var s = new Searcher(); s.search(query, pu.hitch(ui, ui.showResults)); }, /** Prints and handles errors in a consistent way. @param err : Error Error object containing issue. */ handleError: function(err) { ui.status('Error initializing search: ' + err.message | err); if (pu.query('input.gsc-input')) { pu.query('input.gsc-input')[0].disabled = true; } if (pu.query('.gsc-search-button')) { pu.query('.gsc-search-button')[0].disabled = true; } throw err; }, ready_: function() { // init the UI var autoSearch = this.getAutoSearch_(); var id = this.getWidgetID_(); ui = new UI(id, autoSearch); var continueInit = ui.embed(); if (!continueInit) { return; } // see if the page author wants to force the MIME type of the // files to index this.forceMimeType = this.getForceMimeType_(); // make our client-side database dbName = this.getDatabaseName_(); console.info('Database name: ' + dbName); this.createDatabaseTables_(); // fetch our Gears manifest file and process it; // this kicks off the document fetching and indexing process // if needed var url = this.getSearchManifestURL_(); // var searchManifest = new SearchManifest(url); // Thibaud: commented this line and added the bunch of statements here below for faster manifest version check // Thibaud: copied part of the statements from the SearchManifest.process_() function into here for faster manifest version check var oldVersion = this.getDBVersion_(); var finishedIndexing = this.getFinishedIndexing_(); var thisVersion = pu.query("link[rel='search.urls']")[0].getAttribute('version'); // Thibaud: added this line for faster manifest version check if (oldVersion !== thisVersion || !finishedIndexing) { var searchManifest = new SearchManifest(url); } else { ui.ready(); } }, // Thibaud: copied this function (exactly as is) from the SearchManifest object into here for faster manifest version check getDBVersion_: function() { var results = null; var rs = db.execute('SELECT version FROM ClientMetadata LIMIT 1'); try { if (rs.isValidRow()) { results = rs.fieldByName('version'); } } finally { rs.close(); } return results; }, // Thibaud: copied this function (exactly as is) from the SearchManifest object into here for faster manifest version check getFinishedIndexing_: function() { var finishedIndexing = false; var rs; try { rs = db.execute('SELECT finishedIndexing FROM ClientMetadata ' + 'WHERE ROWID = 1'); if (rs.fieldByName('finishedIndexing') == 1) { finishedIndexing = true; } } finally { rs.close(); } return finishedIndexing; }, getWidgetID_: function() { var id = 'st-widget'; if (pu.query("meta[name='search.widget']").length) { id = pu.query("meta[name='search.widget']")[0].getAttribute('content'); } return id; }, getSearchManifestURL_: function() { var url = 'search.txt'; if (pu.query("link[rel='search.urls']").length) { url = pu.query("link[rel='search.urls']")[0].getAttribute('href'); } return url; }, /** Controls whether to search while you type; can help with larger database corpuses to turn this off. Usage: */ getAutoSearch_: function() { var autoSearch = true; if (pu.query("meta[name='search.auto']").length) { autoSearch = pu.query("meta[name='search.auto']")[0].getAttribute('content'); autoSearch = (autoSearch == 'true' || autoSearch == true) ? true : false; } return autoSearch; }, /** Controls whether we filter based on MIME type. Useful if you know what the MIME type of all your files is and you can manually say what they are; good for very large corpuses to increase startup performance. Usage: */ getForceMimeType_: function() { var forceMimeType = false; if (pu.query("meta[name='search.force-mime-type']").length) { forceMimeType = pu.query("meta[name='search.force-mime-type']")[0].getAttribute('content'); } return forceMimeType; }, getDatabaseName_: function() { var url = window.location.href; url = url.replace(/#(.*)$/, ''); // remove anchor // Thibaud: remove the querystring to enable URL-based search without creating a new database for each query if (url.indexOf("?") > 0) { url = url.substring(0, url.indexOf("?")); } // turn the following characters into underscores: // / \ : * ? " < > | ; , // Gears should be able to allow spaces in db names but doesn't, // so transform those too url = url.replace(/\/|\\|:|\*|\?|\"|\<|\>|\||\;|\,|\.|\-|\=|[ ]/g, '_'); url = this.appname_ + url; // Gears has a 64 character limit url = url.substring(0, 63); return url; }, /* Creates the tables necessary for client-side search to work. */ createDatabaseTables_: function() { db = google.gears.factory.create('beta.database'); db.open(dbName); db.execute('CREATE TABLE IF NOT EXISTS ClientMetadata ' + '(version VARCHAR UNIQUE, ' + ' schemaVersion VARCHAR, ' + ' finishedIndexing INTEGER)'); var rs; try { rs = db.execute('SELECT COUNT(*) FROM ClientMetadata'); var count = rs.field(0); if (!count) { db.execute('INSERT INTO ClientMetadata (schemaVersion) ' + 'VALUES (?)', ['0.2']); } } finally { rs.close(); } try { db.execute('CREATE VIRTUAL TABLE ClientSearch ' + 'USING fts2(url, title, mimeType, content)'); } catch (e) {} // just ignore if it exists }, /** Called when we are finished indexing and are ready to be searched. */ finishedIndexing_: function() { // persist that we successfully indexed db.execute('UPDATE ClientMetadata SET finishedIndexing = 1 ' + 'WHERE ROWID = 1'); ui.ready(); // for unit testing if (this.finishedCallback_) { this.finishedCallback_.callback(); } } }); /* Encapsulates the search UI. */ pu.declare('UI', null, { currentProgress: 0, finalProgress: 0, widget_: null, autoSearch_: true, constructor: function(id, autoSearch) { this.widget_ = pu.byId(id); if (!this.widget_) { var err = new Error('You must have an HTML element with the ID ' + id + ' in order to have client-side ' + 'search using PubTools'); throw err; } this.autoSearch = autoSearch; }, embed: function() { // Gears present? if (!this.gearsInstalled_(this.widget_)) { return false; } // ask the user for permission to use Gears if necessary if (!google.gears.factory.hasPermission) { var msg = 'This site would like to use Google Gears to enable fast, ' + 'as-you-type searching of its documents.'; var allowed = google.gears.factory.getPermission(null, null, msg); if (!allowed) { return false; } } // HTML structure for search box similar to the Google Ajax Search // Control: // http://code.google.com/apis/ajaxsearch/documentation/#StylingTheSearchControl // Thibaud: added id="search" to the input var html = '
' + ' ' + '
' + '
' + '
' + '
'; this.widget_.innerHTML = html; // search when user clicks search button var searchButton = pu.query('.gsc-search-button')[0]; searchButton.disabled = true; pu.connect(searchButton, 'onclick', function() { var query = pu.query('input.gsc-input')[0].value; searchTools.search(query); }); // search after a letter is entered or removed var inputField = pu.query('input.gsc-input')[0]; inputField.disabled = true; if (this.autoSearch) { pu.connect(inputField, 'onkeyup', function() { ui.clearResults(); var query = pu.query('input.gsc-input')[0].value; if (query) { searchTools.search(query); } }); } // search when enter key is pressed pu.connect(inputField, 'onkeyup', function(evt) { ui.clearResults(); if (evt.keyCode == 13 || evt.keyCode == 3) { var query = pu.query('input.gsc-input')[0].value; searchTools.search(query); } }); // cancel the search form button's default submission pu.query('.gsc-search-box')[0].onsubmit = function() { return false; } return true; }, /** Prints a status message to the user in the UI. */ status: function(msg) { pu.byId('st-status').innerHTML = msg; }, clearStatus: function() { pu.byId('st-status').innerHTML = ''; }, tickProgress: function(msg) { if (!msg) { msg = ''; } this.currentProgress++; var processing = Math.round(this.currentProgress / this.finalProgress * 100); this.status('Processing...' + processing + '% ' + msg); }, /* Shows the search results to the user. @param results : Array An array of object literals of results. Each object literal has the following properties: title - the title of the result snippet - a snippet of the result href - the URL to the result */ showResults: function(entries) { this.clearResults(); ui.clearStatus(); if (entries.length == 0) { ui.status("No results found"); return; } var resultsArea = pu.query('.searchtools.gsc-resultsbox-visible')[0]; var root = document.createElement('div'); root.className = 'gsc-resultsRoot'; resultsArea.appendChild(root); var results = document.createElement('div'); results.className = 'gsc-results gsc-stResult'; root.appendChild(results); for (var i = 0; i < entries.length; i++) { var entry = entries[i]; var result = document.createElement('div'); results.className = 'gsc-stResult gsc-result'; results.appendChild(result); var resultContent = document.createElement('div'); resultContent.className = 'gs-stResult gs-result'; result.appendChild(resultContent); var titleElem = document.createElement('div'); titleElem.className = 'gs-title'; resultContent.appendChild(titleElem); var titleA = document.createElement('a'); titleA.className = 'gs-title'; titleA.setAttribute('href', entry.href); titleA.setAttribute('target', '_blank'); titleA.appendChild(document.createTextNode(entry.title)); titleElem.appendChild(titleA); var snippet = document.createElement('div'); snippet.className = 'gs-snippet'; snippet.innerHTML = entry.snippet; resultContent.appendChild(snippet); var longURL = document.createElement('div'); longURL.className = 'gs-visibleUrl gs-visibleUrl-long'; longURL.appendChild(document.createTextNode(entry.href)); resultContent.appendChild(longURL); } }, clearResults: function() { var resultsArea = pu.query('.searchtools.gsc-resultsbox-visible')[0]; resultsArea.innerHTML = ''; }, ready: function() { this.clearStatus(); var searchButton = pu.query('.gsc-search-button')[0]; searchButton.disabled = false; var inputField = pu.query('input.gsc-input')[0]; inputField.disabled = false; inputField.focus(); // Thibaud: added this PHP code for URL-based search inputField.value = "<%= $_GET["q"] %>"; searchTools.search(inputField.value); }, gearsInstalled_: function(widget) { if (!window.google || !window.google.gears) { var message = 'To enable fast client-side search of this website ' + 'please install Google Gears'; var url = 'http://gears.google.com/?action=install' + '&message=' + encodeURIComponent(message) + '&return=' + encodeURIComponent(window.location.href); /* widget.innerHTML = 'Install ' + 'Google Gears to enable fast search!'; */ widget.innerHTML = "To install Moogle Code Search please follow the Installation chapter in the Help page here below."; // added by Thibaud Lopez Schneider return false; } else { return true; } } }); /** Fetches a search manifest file so that we can get a list of URLs to work with. Also manages the version number of this manifest, persisting it into the client-side database and determining if we even need to re-index this material, since we might already have indexed it locally. */ pu.declare('SearchManifest', null, { version: null, urls: [], constructor: function(url) { // fetch and process this manifest file this.fetch_(url) .addCallback(this, this.process_) .addErrback(searchTools, searchTools.handleError); }, /** Fetches the manifest file. @param url : String - The URL of the manifest file. @returns Deferred - Returns a Deferred. The callback receives no arguments. */ fetch_: function(url) { ui.status('Fetching manifest...'); var d = new pu.Deferred(); pu.xhrGet({ url: url, preventCache: true, handleAs: 'text', load: pu.hitch(this, function(data) { var file = this.parse_(data); this.version = file.version; this.urls = file.urls; d.callback(); return data; }), error: function(err) { d.errback(new Error(err)); return err; } }); return d; }, /** Parses the textual search manifest data into a form we can work with. Throws an exception if the data has an incorrect format. */ parse_: function(data) { // remove carriage returns for Windows text files data = data.replace(/\r/g, ''); // split on new lines var file = data.split('\n'); if (!file || !file.length || file[0].indexOf('version=') == -1) { var err = new Error('Invalid search manifest file: \n' + data); throw err; } // version should be on the first line var m = file[0].match(/^version=(.*)$/); if (!m[1]) { var err = new Error('No version given in search manifest file'); throw err; } var version = m[1]; // throw away blank lines for (var i = 0; i < file.length; i++) { if (file[i] == '') { file.splice(i, 1); } } // get the URLs file.splice(0, 1); var urls = file; return {version: version, urls: urls}; }, /** Process the manifest version and list of URLs, determining if we even need to index. If we do, then it kicks off fetching the list of URLs as actual documents. */ process_: function() { ui.status('Processing...'); // there are five steps in the processing: // filter_ request + filter_ response + download_ request // + download_ response + index ui.finalProgress = this.urls.length * 5; var oldVersion = this.getDBVersion_(); var finishedIndexing = this.getFinishedIndexing_(); // if the version has changed OR we never finished indexing in // the past... if (oldVersion !== this.version || !finishedIndexing) { // flag that we haven't finished indexing db.execute('UPDATE ClientMetadata SET finishedIndexing = NULL ' + 'WHERE ROWID = 1'); // fetch these documents and index them as they return var docs = new Documents(this.urls); // persist this new manifest version this.putDBVersion_(this.version); } else { ui.ready(); } }, getDBVersion_: function() { var results = null; var rs = db.execute('SELECT version FROM ClientMetadata LIMIT 1'); try { if (rs.isValidRow()) { results = rs.fieldByName('version'); } } finally { rs.close(); } return results; }, putDBVersion_: function(version) { db.execute('UPDATE ClientMetadata ' + 'SET version = ? ' + 'WHERE ROWID = 1', [version]); }, getFinishedIndexing_: function() { var finishedIndexing = false; var rs; try { rs = db.execute('SELECT finishedIndexing FROM ClientMetadata ' + 'WHERE ROWID = 1'); if (rs.fieldByName('finishedIndexing') == 1) { finishedIndexing = true; } } finally { rs.close(); } return finishedIndexing; } }); /** Takes an array of URLs from a manifest and fetches the document's actual values, returning textual (HTML, XML, text) documents that can be indexed. */ pu.declare('Documents', null, { filtered_: 0, constructor: function(urls) { db.execute('DELETE FROM ClientSearch'); // filter the URLs to just ones we can work with, then download // their contents this.filter_(urls, pu.hitch(this, this.download_)); }, /** Takes a list of URLs and filters out only the ones we can index. This is done by doing an HTTP HEAD request on each one to get the MIME type. We only fetch and index text, XML, and HTML files. This is an optimization so that we don't try to fetch everything, as there might be many images, videos, or binary objects in the manifest that we don't want to re-download when indexing. @param urls : Array Array of URLs to do the HEAD request on. @param callback : Function. This callback is called with an array of object literals that have the following entries: url : String - URL to the resource mimeType : String - the MIME type of the resource All of these entries are suitable for indexing (i.e. binary types have been filtered out, for example) */ filter_: function(urls, callback) { var filteredURLs = []; // just fill out the MIME type if the page author wants to force // it for performance reasons if (searchTools.forceMimeType) { for(var i = 0; i < urls.length; i++) { filteredURLs.push({url: urls[i], mimeType: searchTools.forceMimeType}); ui.tickProgress(); // Thibaud: added to compensate progress indicator if forceMimeType ui.tickProgress(); // Thibaud: added to compensate progress indicator if forceMimeType } callback(filteredURLs); return; } pu.forEach(urls, pu.hitch(this, function(url) { ui.tickProgress(); var xhr = pu.xhr('HEAD', { url: url, // if we don't cache bust, then the HEAD request doesn't // always work on Firefox preventCache: true, load: pu.hitch(this, function(data) { this.filtered_++; ui.tickProgress(); var mime = xhr.getResponseHeader('Content-Type'); // strip out charset var m = mime.match(/^([^;]*)/); if (m.length > 1) { mime = m[1]; } if (mime == 'text/html' || mime == 'text/plain' || mime == 'application/xml' || mime == 'text/xml' || mime == 'application/xhtml+xml') { filteredURLs.push({url: url, mimeType: mime}); } if (this.filtered_ == urls.length) { callback(filteredURLs); } return data; }), error: pu.hitch(this, function(err) { this.filtered_++; if (this.filtered_ == urls.length) { callback(filteredURLs); } return err; }) }, false); xhr = xhr.ioArgs.xhr; })); }, /* Takes an array of object literals to download and downloads their content, indexing each of them as the content comes in. Each of these object literals should have two entries, 'url' to the resource and 'mimeType' giving the mime type of the resource. */ download_: function(downloadMe) { var idx = new Indexer(downloadMe.length); pu.forEach(downloadMe, function(entry) { var url = entry.url; var mimeType = entry.mimeType; ui.tickProgress(); pu.xhrGet({ url: url, load: function(data) { ui.tickProgress(); idx.index(url, mimeType, data); return data; }, error: pu.hitch(this, function(err) { searchTools.handleError(err); return err; }) }); }); } }); /** Indexes a set of documents into Gears' full-text search client-side database. */ pu.declare('Indexer', null, { numDocs_: null, indexMe_: [], /** @param numDocs : Number The number of total docs to index. */ constructor: function(numDocs) { this.numDocs_ = numDocs; }, /* Indexes the given document into Gears' client-side search. @param url : String URL of this document. @param mimeType : String MIME type for this document. @param doc : String Contents of the document. */ index: function(url, mimeType, doc) { this.indexMe_.push({url: url, title: '', mimeType: mimeType, content: doc}); if (this.indexMe_.length < this.numDocs_) { return; } // we've got everything -- do the indexing now in one shot, // which is much faster than doing them all separately var worker = google.gears.factory.create('beta.workerpool'); var workerScript = 'var getTitle = ' + String(this.retitle_) + '; ' /* Thibaud: replaced getTitle_ by retitle_*/ + 'var getUrl_ = ' + String(this.getUrl_) + '; ' /* Thibaud: added this line */ + 'google.gears.workerPool.onmessage = ' + String(this.indexWorker_); var childWorkerId = worker.createWorker(workerScript); worker.onmessage = pu.hitch(this, function(a, b, message) { if (message.text == 'Done') { searchTools.finishedIndexing_(); } else if (message.text == 'Tick') { ui.tickProgress(); } }); // send the worker a message to run var msg = {dbName: dbName, indexMe: this.indexMe_}; worker.sendMessage(msg, childWorkerId); }, /** We stringify this and have it run on the worker, indexing our documents. */ indexWorker_: function(a, b, message) { var args = message.body; var indexMe = args.indexMe; // generate titles for (var i = 0; i < indexMe.length; i++) { var entry = indexMe[i]; entry.title = getTitle(entry.url, entry.mimeType, entry.content); } // index them into the database var db = google.gears.factory.create('beta.database'); db.open(args.dbName); try { db.execute('BEGIN'); for (var i = 0; i < indexMe.length; i++) { var entry = indexMe[i]; entry = [getUrl_(entry.url), entry.title, entry.mimeType, entry.content]; // Thibaud: replaced entry.url by getUrl_(entry.url) db.execute('INSERT INTO ClientSearch (url, title, mimeType, content) ' + 'VALUES (?, ?, ?, ?)', entry); google.gears.workerPool.sendMessage('Tick', message.sender); } db.execute('COMMIT'); } finally { db.close(); } google.gears.workerPool.sendMessage('Done', message.sender); }, /** We stringify this and have it run on the worker, fetching what an appropriate title is for this document. @param url : String URL to document. @param mimeType : String MIME type of this document @param doc : String Document's contents. */ getTitle_: function(url, mimeType, doc) { // handle HTML, XML, and text differently var title = url; if (mimeType == 'application/xhtml+xml' || mimeType == 'text/html') { // search for a title tag var titleRE = /<\s*title\s*>\s*([^<]*)\s*<\s*\/\s*title\s*>/im; var m = doc.match(titleRE); if (m && m.length > 1) { title = m[1]; } } if (title.length >= 40) { title = title.substring(0, 60) + '...'; } else { title = title.substring(0, 60); } return title; }, /** Thibaud: re-title the M3 API Repository based on the URL, because it has no titles. */ retitle_: function(url, mimeType, doc) { if (url.indexOf("/reposit.html") > 0) { // it's the main page return "M3 API Repository"; } else if (url.indexOf("/programs.html") > 0) { // it's the list of programs return "M3 API Repository: Programs"; } else if (url.indexOf("/components.html") > 0) { // it's the list of components return "M3 API Repository: Components"; } else if (url.indexOf("_") == -1) { // it's the program details var s = url.split("/"); var program = s[s.length - 1].replace(".html", ""); return program + ": Details of program"; } else if (url.indexOf("_fields.html") > 0) { // it's the list of input/output parameters var s = url.split("/"); var t = s[s.length - 1].split("_"); var program = t[0]; var transaction = t[1]; return program + "." + transaction + "(): INPUT/OUTPUT fields"; } else if (url.indexOf("_trans.html") > 0 && (url.indexOf("_") != url.lastIndexOf("_"))) { // it's the transaction details var s = url.split("/"); var t = s[s.length - 1].split("_"); var program = t[0]; var transaction = t[1].replace("trans.html", ""); return program + "." + transaction + "(): Details of transaction"; } else if (url.indexOf("_trans.html") > 0 && (url.indexOf("_") == url.lastIndexOf("_"))) { // it's the list of methods var s = url.split("/"); var program = s[s.length - 1].replace("_trans.html", ""); return program + ": list of transactions"; } else { // the url is unknown; return as is return url; } }, /** Thibaud: replace the URL from my site's repository to the Lawson API Repository */ getUrl_: function(url) { if (url.indexOf("/reposit.html") > 0) { // it's the main page return "http://www.lawson.com/Reposit13/reposit.html"; } else if (url.indexOf("/programs.html") > 0) { // it's the list of programs return "http://www.lawson.com/cgi-bin/Reposit.exe?cfgFile=/lotus/domino/sestw034/domino/cgi-bin/Rep13.cfg&action=LIST&reqtype=program&PView=1"; } else if (url.indexOf("/components.html") > 0) { // it's the list of components return "http://www.lawson.com/cgi-bin/Reposit.exe?cfgFile=/lotus/domino/sestw034/domino/cgi-bin/Rep13.cfg&action=LIST&reqtype=program&PView=0"; } else if (url.indexOf("_") == -1) { // it's the program details var s = url.split("/"); var program = s[s.length - 1].replace(".html", ""); return "http://www.lawson.com/cgi-bin/Reposit.exe?cfgFile=/lotus/domino/sestw034/domino/cgi-bin/Rep13.cfg&MINM=" + program + "&reqtype=program"; } else if (url.indexOf("_fields.html") > 0) { // it's the list of input/output parameters var s = url.split("/"); var t = s[s.length - 1].split("_"); var program = t[0]; var transaction = t[1]; return "http://www.lawson.com/cgi-bin/Reposit.exe?cfgFile=/lotus/domino/sestw034/domino/cgi-bin/Rep13.cfg&MINM=" + program + "&TRNM=" + transaction + "&action=LIST&reqtype=field"; } else if (url.indexOf("_trans.html") > 0 && (url.indexOf("_") != url.lastIndexOf("_"))) { // it's the transaction details var s = url.split("/"); var t = s[s.length - 1].split("_"); var program = t[0]; var transaction = t[1].replace("trans.html", ""); return "http://www.lawson.com/cgi-bin/Reposit.exe?cfgFile=/lotus/domino/sestw034/domino/cgi-bin/Rep13.cfg&MINM=" + program + "&TRNM=" + transaction + "&reqtype=trans"; } else if (url.indexOf("_trans.html") > 0 && (url.indexOf("_") == url.lastIndexOf("_"))) { // it's the list of methods var s = url.split("/"); var program = s[s.length - 1].replace("_trans.html", ""); return "http://www.lawson.com/cgi-bin/Reposit.exe?cfgFile=/lotus/domino/sestw034/domino/cgi-bin/Rep13.cfg&MINM=" + program + "&action=LIST&reqtype=trans"; } else { // the url is unknown; return as is return url; } } }); /** Searches through Gear's client-side database for documents that match a given query, producing results and a snippet for the document. */ pu.declare('Searcher', null, { /** Using the given query, the client-side database is searched. This query is in the format that Gear's client-side database will take. See http://code.google.com/apis/gears/api_database.html#sqlite_fts for the query syntax. @param dbName : String The database name. @param query : String A String query, such as "tomatoes" to search for an instance of this word on all columns, or "name:tomatoes" to just search for documents with tomatoes in their name column. @param callback : Function Given an Array of results on the callback. Each array entry is an object literal with the following: * url : String URL to this resource * title : String Human readable title for this document * snippet : String A snippet of this document * content : String The full contents of this document */ search: function(query, callback) { // do things on a worker so we don't block the browser // the code to run on the worker var workerScript = function(a, b, message) { var params = message.text.split(":::"); var query = params[0]; var dbName = params[1]; var db = google.gears.factory.create('beta.database'); db.open(dbName); // FIXME: Don't allow advanced queries, maybe backslashing // special terms in there -- only allow normal string search; // this is so the snippet generation will work correctly var rs = db.execute('SELECT * FROM ClientSearch WHERE ' + 'content MATCH ?', [query]); try { results = []; while (rs.isValidRow()) { var entry = {}; entry.title = rs.fieldByName('title'); entry.title = entry.title.replace(/\'/g, "\\'"); entry.href = rs.fieldByName('url'); var mimeType = rs.fieldByName('mimeType'); var content = rs.fieldByName('content'); entry.snippet = getSnippet(query, mimeType, content); results.push(entry); rs.next(); } } finally { rs.close(); db.close(); } google.gears.workerPool.sendMessage(results, message.sender); }; workerScript = 'var escapeString = ' + String(this.escapeString_) + '; ' + 'var getSnippet = ' + String(this.getSnippet_) + '; ' + 'google.gears.workerPool.onmessage = ' + workerScript; // where we get the results from the worker var worker = google.gears.factory.create('beta.workerpool'); worker.onmessage = function(a, b, message) { var entries = message.body; callback(entries); }; var childWorkerId = worker.createWorker(workerScript); worker.sendMessage(query + ":::" + dbName, childWorkerId); }, /** Stringified for search worker. Taken from dojo._escapeString, which is under a BSD license. Adds escape sequences for non-visual characters, double quote and backslash and surrounds with double quotes to form a valid string literal. */ escapeString_: function(str) { return ('"' + str.replace(/(["\\])/g, '\\$1') + '"'). replace(/[\f]/g, "\\f").replace(/[\b]/g, "\\b").replace(/[\n]/g, "\\n"). replace(/[\t]/g, "\\t").replace(/[\r]/g, "\\r"); // string }, /** Stringified for search worker. Extracts an appropriate snippet from some content based on a query and bolds the query terms. Note that this only works with space-delimited languages, like English, for now. */ getSnippet_: function(query, mimeType, content) { var snippet, start, end; // how many characters before and after query term to grab var proximity = 60; var pos = content.indexOf(query); if (pos == -1) { return ''; } // throw away XML/HTML tags first if (mimeType != 'text/plain') { // FIXME: TODO: truly remove tag contents below content = content.split(/<\s*[^>]*\s*>/).join(' '); // get rid of spurious spaces left over after some tags are removed content = content.replace(/\s{2,}/g, ' '); content = content.replace(/^\s/, ''); content = content.replace(/\s$/, ''); } // determine start cut start = (pos == 0) ? 0 : pos - proximity; start = (start < 0) ? 0 : start; // determine end cut end = (pos == (content.length - 1)) ? pos : pos + proximity; end = (end > content.length) ? content.length - 1 : end; snippet = content.substring(start, end); // the first word might be a fragment; throw it away. // however, if the beginning of the fragment matches our // query then don't if (snippet.indexOf(query) != 0) { // while we start with a word, keep looping while (/^\w/.test(snippet)) { snippet = snippet.substring(1); } } // same with the last word. // make sure end of snippet isn't just the query term if ((snippet.indexOf(query) + query.length) != snippet.length) { while (/\w$/.test(snippet)) { snippet = snippet.substring(0, snippet.length - 1); } } // strip off leading and ending punctuation and spaces var puncRE = /^[\~`!@#\$%\^&\*\(\)\-\+=\[\]{}:;"'<>\?,\.\/\\\|]/; snippet = snippet.replace(puncRE, ''); puncRE = /[\~`!@#\$%\^&\*\(\)\-\+=\[\]{}:;"'<>\?,\.\/\\\|]$/; snippet = snippet.replace(puncRE, ''); snippet = snippet.replace(/(?:^\s*)|(?:\s*$)/, ''); // surround query term with bold snippet = snippet.replace(query, '' + query + ''); return snippet; } }); // makes SearchTools global variable searchTools = new SearchTools(); return searchTools; }(); // end function() {} to make everything anonymous